	clear all
	set more off

	*local 1 "C:/Users/lcarvalh/Documents/UKB"
	 local 1 "/Users/lcarvalh/Documents/UKB"
	*local 1 "//econ-fs/Home4/uctpscc/UKB"

	 *do "`1'/dofiles/_Merge" "`1'" 
	 
	 
	 cap cd /
	 cap cd "`1'"
	 use "data/secondary/_Merge.dta", clear
	 */
	 
	******************************
	*** Gender
	******************************

	ren n_31_0_0 male
	label variable male "1 if Male"
	 
	**************************
	***    Country of birth
	**************************

	ren n_1647_0_0 country_of_birth

	label define country_of_birth 1 "England" 2 "Wales" 3 "Scotland"
	label values country_of_birth country_of_birth

	gen Wales = (country_of_birth == 2)
	label variable Wales "1 if born in Wales"
	gen Scotland = (country_of_birth == 3)
	label variable Scotland "1 if born in Scotland"

	drop n_1647_*

	**************************
	***    Ethnicity
	**************************

	gen white = (n_21000_0_0 == 1 | n_21000_0_0 == 1001 | n_21000_0_0 == 1002 | n_21000_0_0 == 1003)
	
	gen mixed_ethnicity = 0
		replace mixed_ethnicity = 1 if n_21000_0_0 == 2 | n_21000_0_0 == 2001 | n_21000_0_0 == 2002 | n_21000_0_0 == 2003 | n_21000_0_0 == 2004  
	gen asian = 0
		replace asian = 1 			if n_21000_0_0 == 3 | n_21000_0_0 == 3001 | n_21000_0_0 == 3002 | n_21000_0_0 == 3003 | n_21000_0_0 == 3004  
	gen black = 0
		replace black = 1 			if n_21000_0_0 == 4 | n_21000_0_0 == 4001 | n_21000_0_0 == 4002 | n_21000_0_0 == 4003 | n_21000_0_0 == 4004  
	gen other_ethnicity = 0
		replace other_ethnicity = 1 if n_21000_0_0 == 5 | n_21000_0_0 == 6 
	gen NR_ethnicity = 0
		replace NR_ethnicity = 1 	if n_21000_0_0 == -1 | n_21000_0_0 == -3 
		
	label variable white 			"1 if white"	
	label variable mixed_ethnicity 	"1 if mixed background"	
	label variable asian 			"1 if asian"	
	label variable black 			"1 if black"	
	label variable other_ethnicity 	"1 if other ethnicity"	
	label variable NR_ethnicity		"1 if did not know/report ethnicity"	
		
	drop n_21000_*	
		
	**************************
	*** 	Date of birth
	**************************

	ren n_52_0_0  month_birth
	ren n_34_0_0  year_birth
	ren ts_33_0_0 date_birth
	
	gen DoB = date_birth - td(01sep1957) 

	label variable DoB "Date of birth in days"

	******************************
	*** Age at assessment
	******************************

	gen age = ts_53_0_0 - date_birth

	label variable age "Age in days at time of assessment"

	gen age2 = age ^ 2

	label variable age2 "Age in days squared"


	drop n_55_*
	drop ts_53_*

	******************************
	*** Post and post-trend
	******************************

	gen after=(DoB>=0)
	gen DoBafter  = DoB*after

	label variable after "1 if born after Sep 1, 1957"
	label variable DoBafter "DoB x after"
	
	*********************************
	*** Month of birth fixed effects
	*********************************

	gen Jan = month_birth == 1 
	gen Feb = month_birth == 2
	gen Mar = month_birth == 3
	gen Apr = month_birth == 4
	gen May = month_birth == 5
	gen Jun = month_birth == 6
	gen Jul = month_birth == 7
	gen Aug = month_birth == 8
	gen Oct = month_birth == 10
	gen Nov = month_birth == 11
	gen Dec = month_birth == 12

	label variable Jan "Born in January"
	label variable Feb "Born in February"
	label variable Mar "Born in March"
	label variable Apr "Born in April"
	label variable May "Born in May"
	label variable Jun "Born in June"
	label variable Jul "Born in July"
	label variable Aug "Born in August"
	label variable Oct "Born in October"
	label variable Nov "Born in November"
	label variable Dec "Born in December"



	local MoB "Jan Feb Mar Apr May Jun Jul Sep Oct Nov Dec"

	******************************
	*** School leaving age
	******************************
	 
	ren n_845_0_0 SLA

	gen college=. 
	foreach entry of numlist 0/5 {
		replace college=1 if n_6138_0_`entry'==1
	}
	replace SLA=. 	if SLA<0
	replace SLA=22 	if college==1
	gen edu16=(SLA>=16) if SLA~=.
	gen edu17=(SLA>=17) if SLA~=.
	gen edu18=(SLA>=18) if SLA~=.
	gen edu19=(SLA>=19) if SLA~=.
	
	drop n_845_* college

	label variable SLA "School leaving age"
	label variable edu16 "1 if stayed in school until 16"
	label variable edu17 "1 if stayed in school until 17"
	label variable edu18 "1 if stayed in school until 18"
	label variable edu19 "1 if stayed in school until 19"

	******************************
	*** Qualifications
	******************************

	tempvar tempvar
	gen `tempvar' = 0
		forvalues i = 0/5 {
			replace `tempvar' = 1	if  (n_6138_0_`i' >= 1 & n_6138_0_`i' <= 6) | n_6138_0_`i' == -7 
		}
		
	gen college = .
		replace college = 0			if `tempvar' == 1
		forvalues i = 0/5 {
			replace college = 1		if  n_6138_0_`i' == 1 
		}
		
	gen Alevel = .
		replace Alevel = 0					if `tempvar' == 1
		forvalues i = 0/5 {
			replace Alevel = 1				if  n_6138_0_`i' == 2 
		}

	gen Olevel = .
		replace Olevel = 0					if `tempvar' == 1
		forvalues i = 0/5 {
			replace Olevel = 1				if  n_6138_0_`i' == 3 
		}
		
	gen CSE = .
		replace CSE = 0						if `tempvar' == 1
		forvalues i = 0/5 {
			replace CSE = 1					if  n_6138_0_`i' == 4 
		}

	gen CSE_Olevel = .
		replace CSE_Olevel = 0				if `tempvar' == 1
		forvalues i = 0/5 {
			replace CSE_Olevel = 1			if  n_6138_0_`i' == 3 | n_6138_0_`i' == 4 
		}

	gen no_qualification = .
		replace no_qualification = 0		if `tempvar' == 1
		replace no_qualification = 1		if  n_6138_0_0 == -7
		

	label variable college "1 if has college degree"
	label variable Alevel "1 if has A-level"
	label variable CSE "1 if has CSE"	
	label variable Olevel "1 if has O-level"
	label variable CSE_Olevel "1 if has CSE or O-level"
	label variable no_qualification "1 if no formal qualification"
		
	******************************
	*** Years of Schooling
	******************************

	gen schooling = .
		forvalues i = 0/5 {
			replace schooling = 7	if n_6138_0_`i' == -7 
		}
		forvalues i = 0/5 {
			replace schooling = 10	if n_6138_0_`i' == 3 | n_6138_0_`i' == 4 
		}
		forvalues i = 0/5 {
			replace schooling = 13	if n_6138_0_`i' == 2 
		}
		forvalues i = 0/5 {
			replace schooling = 15	if n_6138_0_`i' == 6 
		}
		forvalues i = 0/5 {
			replace schooling = 19	if n_6138_0_`i' == 5 
		}
		forvalues i = 0/5 {
			replace schooling = 20	if n_6138_0_`i' == 1 
		}
	
	label variable schooling "Years of schooling"

	drop n_6138_*
	
	
	******************************
	*** Blood pressure
	******************************

	// NOTE1: We are keeping participants for whom only one measure of 
	// blood pressure was taken
	// NOTE2: We are taking averages if there are multiple measures. 

	gen byte auto_d1 = (n_4079_0_0 ~= .)
	gen byte auto_d2 = (n_4079_0_1 ~= .)
	gen byte auto_s1 = (n_4080_0_0 ~= .)
	gen byte auto_s2 = (n_4080_0_1 ~= .)
	gen byte manual_s1 = (n_93_0_0 ~= .)
	gen byte manual_s2 = (n_93_0_1 ~= .)
	gen byte manual_d1 = (n_94_0_0 ~= .)
	gen byte manual_d2 = (n_94_0_1 ~= .)

	foreach type in s d {
		gen auto_`type' 	= auto_`type'1   + auto_`type'2
		gen manual_`type' 	= manual_`type'1 + manual_`type'2
		gen total_`type' 	= auto_`type' + manual_`type' 
	}

	tab auto_s manual_s, mi
	tab auto_d manual_d, mi

	drop auto_* manual_* total_s total_d

	egen bps1 = rowmean(n_4080_0_0 n_93_0_0)
	egen bpd1 = rowmean(n_4079_0_0 n_94_0_0)

	egen bps2 = rowmean(n_4080_0_1 n_93_0_1)
	egen bpd2 = rowmean(n_4079_0_1 n_94_0_1)

	egen bps = rowmean(bps1 bps2)
	egen bpd = rowmean(bpd1 bpd2)

	label variable bps "Blood pressure systolic"
	label variable bpd "Blood pressure diastolic"


	gen map1 = (2*bpd1 + bps1)/3
	gen map2 = (2*bpd2 + bps2)/3

	egen map = rowmean(map1 map2)
	label variable map "Mean arterial pressure"

	gen pp1 = bps1 - bpd1
	gen pp2 = bps2 - bpd2

	egen pp = rowmean(pp1 pp2)
	label variable pp "Pulse pressure"

	egen pr = rowmean(n_102_0_0 n_102_0_1)
	label variable pr "Pulse rate"

	drop bps1 bps2 bpd1 bpd2 map1 map2 pp1 pp2
	drop n_4079_* n_4080_*
	drop n_93_* n_94_* n_95_*
	drop n_102_*

	******************************
	*** High Blood pressure DIAGNOSIS
	******************************

	gen bp_diagnosis = .
		forvalues i = 0/3 {
			replace bp_diagnosis = 0	if ((n_6150_0_`i' >= 1 & n_6150_0_`i' <= 3) | n_6150_0_`i' == -7)
		}
		replace bp_diagnosis = 1	if n_6150_0_0 == 4 | n_6150_0_1 == 4 | n_6150_0_2 == 4 | n_6150_0_3 == 4

	label variable bp_diagnosis "1 if diagnosed w/ high blood pressure"
	
	******************************
	*** Blood pressure medication
	******************************
	
	gen bp_medication = .
		replace bp_medication = 0	if (n_6153_0_0 == 1 | n_6153_0_0 == 3 | n_6153_0_0 == 4 | n_6153_0_0 == 5 | n_6153_0_0 == -7 | n_6177_0_0 == 1 | n_6177_0_0 == 3 | n_6177_0_0 == -7) 
		replace bp_medication = 1	if n_6153_0_0 == 2 | n_6177_0_0 == 2 

	label variable bp_medication "1 if taking blood pressure medication"
	
	drop n_6177_* n_6153_*
		
	******************************
	*** Anthropometrics
	******************************

	ren n_23104_0_0 BMI1
	label variable BMI1 "BMI - Impedance"

	ren n_21001_0_0 BMI2
	label variable BMI2 "BMI - weight^2 / height"

	egen BMI = rmean(BMI1 BMI2)
	label variable BMI "Average of BM1 and BM2" 

	gen overweight=(BMI>=25) if BMI~=.
	label variable overweight "1 if BMI >= 25"
	gen obese=(BMI>=30) if BMI~=.
	label variable obese "1 if BMI >= 30"

	gen overweight1=(BMI1>=25) if BMI1~=.
	label variable overweight1 "1 if BMI1 >= 25"
	gen obese1=(BMI1>=30) if BMI1~=.
	label variable obese1 "1 if BMI1 >= 30"

	gen overweight2=(BMI2>=25) if BMI2~=.
	label variable overweight2 "1 if BMI2 >= 25"
	gen obese2=(BMI2>=30) if BMI2~=.
	label variable obese2 "1 if BMI2 >= 30"

	gen leg_length = n_50_0_0 - n_20015_0_0
	label variable leg_length "Leg length"
	
	ren n_50_0_0 height
	label variable height "Height in cms"

	ren n_21002_0_0 weight   
	label variable weight "Weight in kilos"
			 
	ren n_48_0_0 waist           
	ren n_49_0_0 hip  

	gen waist_hip=waist/hip
	label variable waist_hip "Waist-hip ratio"
	drop waist hip
			   
	ren n_23099_0_0 bf_percent
	ren n_20015_0_0 trunk_length
	label variable trunk_length "Trunk length"


	drop n_23104_* 

	drop n_48_* n_49_* n_50_*  n_51_*
	drop n_21001_* n_21002_* n_23099_*
	drop n_3077_* // n_20015_* 

	******************************
	*** Spirometry
	******************************

	// PROCEDURE ADOPTED BY DE MATTEIS et al. (2016): "Occupations associated with COPD risk in the large population-based UK Biobank cohort study."
	// The following error messages were considered to specifically identify those manoeuvres that were acceptable by inspection and other criteria: 
	// ’Blank’ (i.e. no error message), ‘Accept’, ‘Accept Below6sec’ (acceptable blow, with plateau even though expiration time less than 6 seconds), ‘Below6sec’ (expiration below six seconds, but no other error message in the manoeuvre). 

	forvalues i = 0/2 {
		gen acceptcurv`i' = 0
			// criteria for acceptance of blow
			replace acceptcurv`i' = 1 if s_20031_0_`i' == "ACCEPT" | s_20031_0_`i' == "BELOW6SEC" | s_20031_0_`i' == "BELOW6SEC ACCEPT" | s_20031_0_`i' == ""
			// smoked or used inhaler
			replace acceptcurv`i' = 0 if n_3090_0_0 == 1 | n_3159_0_0 == 1
			// no blow
			replace acceptcurv`i' = 0 if n_3063_0_`i' == . // necessary because field 20031 is empty when there was no blow		
	}
	egen acceptall1 = rowmax(acceptcurv0 acceptcurv1 acceptcurv2) // at least one blow accepted
	drop s_20031_* n_3090_* n_3159_*


	forvalues i = 0/2 {
		clonevar temp_fvc_`i'  = n_3062_0_`i'
		replace  temp_fvc_`i'  = . 			 if acceptcurv`i' == 0
		clonevar temp_fev1_`i' = n_3063_0_`i'
		replace  temp_fev1_`i' = . 			 if acceptcurv`i' == 0
		clonevar temp_pef_`i'  = n_3064_0_`i'
		replace  temp_pef_`i'  = . 			 if acceptcurv`i' == 0
	}		

			
	egen fvc_1  = rowmax(temp_fvc_0  temp_fvc_1  temp_fvc_2)  if  acceptall1==1
	egen fev1_1 = rowmax(temp_fev1_0 temp_fev1_1 temp_fev1_2) if  acceptall1==1
	egen pef_1  = rowmax(temp_pef_0  temp_pef_1  temp_pef_2)  if  acceptall1==1

	drop temp_fvc_* temp_fev1_* temp_pef_*



	// PROCEDURE ADOPTED BY WAIN et al. (2015): "Novel insights into the genetics of smoking behaviour, lung function, and chronic obstructive pulmonary disease..."
	// (see notes of fields 20150 and 20151), a blow was deemed acceptable if recorded in field 3061 as 0 (no problems) or 32 ("user accepted") 
	forvalues i = 3/5 {
		local j = `i' - 3
		gen acceptcurv`i' = 0
			// blow meets ERS/ATS guidelines + criteria for acceptance of blow
			replace acceptcurv`i' = 1 if n_20152_0_0 == 1 & (n_3061_0_`j' == 0 | n_3061_0_`j' == 32)
			// method of spirometry missing
			replace acceptcurv`i' = 0 if n_23_0_0 == .	
			// no blow
			replace acceptcurv`i' = 0 if n_3063_0_`j' == .	// not really necessary; field 3061 is missing if there is no blow	
	}
	egen acceptall2 = rowtotal(acceptcurv3 acceptcurv4 acceptcurv5) // at least two blows accepted
	drop n_3061_* n_20152_* n_23_*

	forvalues i = 0/2 {
		local j = `i' + 3
		clonevar temp_fvc_`i'  = n_3062_0_`i'
		replace  temp_fvc_`i'  = . 			 if acceptcurv`j' == 0
		clonevar temp_fev1_`i' = n_3063_0_`i'
		replace  temp_fev1_`i' = . 			 if acceptcurv`j' == 0
		clonevar temp_pef_`i'  = n_3064_0_`i'
		replace  temp_pef_`i'  = . 			 if acceptcurv`j' == 0
	}		

			
	egen fvc_2  = rowmax(temp_fvc_0  temp_fvc_1  temp_fvc_2)  if  acceptall2==2 | acceptall2==3
	egen fev1_2 = rowmax(temp_fev1_0 temp_fev1_1 temp_fev1_2) if  acceptall2==2 | acceptall2==3
	egen pef_2  = rowmax(temp_pef_0  temp_pef_1  temp_pef_2)  if  acceptall2==2 | acceptall2==3

	drop temp_fvc_* temp_fev1_* temp_pef_*
	drop n_3062_* n_3063_* n_3064_*


	gen fev1ratio_1 = fev1_1/fvc_1
	gen fev1ratio_2 = fev1_2/fvc_2


	label variable fvc_1 		"FVC - De Mateis et al. 2016"
	label variable fev1_1 		"FEV1 - De Mateis et al. 2016"
	label variable pef_1 		"PEF - De Mateis et al. 2016"
	label variable fev1ratio_1	"FEV1/FVC - De Mateis et al. 2016"

	label variable fvc_2 		"FVC - Wain et al. 2015"
	label variable fev1_2 		"FEV1 - Wain et al. 2015"
	label variable pef_2 		"PEF - Wain et al. 2015"
	label variable fev1ratio_2	"FEV1/FVC - Wain et al. 2015"

	drop n_3089_* n_20150_* n_20151_* acceptcurv* acceptall*

	do "dofiles/_Predict_Spirometry"
	
	drop n_6150_*
	
	******************************
	*** HbA1c
	******************************	
	
	gen HbA1c = (0.09148*n_30750_0_0) + 2.152
	
	gen normal = 		(HbA1c < 5.7) if HbA1c < .
	gen prediabetes = 	(5.7 <= HbA1c & HbA1c < 6.5)
	gen diabetes = 		(HbA1c >= 6.5) if HbA1c < .
	
	drop n_30750_* n_30740_*
	
	******************************
	*** Dropping unused variables
	******************************	
	
	drop n_46_* n_47_* n_21021_* n_3083_* n_3147_* n_1687_* n_2734_* n_2754_* n_1697_* n_2764_*

	********************************************
	** Average total household income before tax
	********************************************
	
	gen inc_18k_orless = (n_738_0_0==1)
		replace inc_18k_orless =. if n_738_0_0==.|n_738_0_0==-1|n_738_0_0==-3
	label variable inc_18k_orless "Hhld income of 18,000 or less"
		
	gen inc_31k_orless = (n_738_0_0==1|n_738_0_0==2)
		replace inc_31k_orless =. if n_738_0_0==.|n_738_0_0==-1|n_738_0_0==-3
	label variable inc_31k_orless "Hhld income of 30,999 or less"
	
	gen inc_52k_orless = (n_738_0_0==1|n_738_0_0==2|n_738_0_0==3)
		replace inc_52k_orless =. if n_738_0_0==.|n_738_0_0==-1|n_738_0_0==-3
	label variable inc_52k_orless "Hhld income of 51,999 or less"
	
	gen inc_100k_orless = (n_738_0_0==1|n_738_0_0==2|n_738_0_0==3|n_738_0_0==4)
		replace inc_100k_orless =. if n_738_0_0==.|n_738_0_0==-1|n_738_0_0==-3
	label variable inc_100k_orless "Hhld income of 100,000 or less"

	clonevar hhld_income = n_738_0_0
		replace hhld_income = .	if n_738_0_0 == -1 | n_738_0_0 == -3
	label variable hhld_income "Hhld income"
	
	drop n_738_*
	
	**********************************
	** Number of vehicles in household
	**********************************

	gen car_0 = (n_728_0_0==1)
		replace car_0=. if n_728_0_0==.|n_728_0_0==-1|n_728_0_0==-3
	label variable car_0 "No cars"

	gen car_1_orless = (n_728_0_0==1|n_728_0_0==2)
		replace car_1_orless=. if n_728_0_0==.|n_728_0_0==-1|n_728_0_0==-3
	label variable car_1_orless "1 car or less"

	gen car_2_orless = (n_728_0_0==1|n_728_0_0==2|n_728_0_0==3)
		replace car_2_orless=. if n_728_0_0==.|n_728_0_0==-1|n_728_0_0==-3
	label variable car_2_orless "2 cars or less"

	gen car_3_orless = (n_728_0_0==1|n_728_0_0==2|n_728_0_0==3|n_728_0_0==4)
		replace car_3_orless=. if n_728_0_0==.|n_728_0_0==-1|n_728_0_0==-3
	label variable car_3_orless "3 cars or less"

	drop n_728_*

	**************************************
	** Own or rent accommodation lived in
	**************************************
	
	gen own = (n_680_0_0==1|n_680_0_0==2)
		replace own=. if n_680_0_0==.|n_680_0_0==-1|n_680_0_0==-3
	
	label variable own "1 if owns accomondation where lives"
	
	drop n_680_*
	
	********************************************
	** Townsend deprivation index at recruitment
	********************************************

	ren n_189_0_0 townsend
	label variable townsend "Townsend deprivation index"
	
	******************************
	** Current employment status
	******************************

	gen employed=(n_6142_0_0==1)
		replace employed=. if n_6142_0_0==-3|n_6142_0_0==.
	label variable employed "1 if employed"

	gen retired =(n_6142_0_0==2)
		replace retired=. if n_6142_0_0==-3|n_6142_0_0==.
	label variable retired "1 if retired"
	
	drop n_6142_*
	
	******************************
	*** Smoking
	******************************
	
	gen smokes_now = .
		replace smokes_now = 0	if n_1239_0_0 == 0
		replace smokes_now = 1	if n_1239_0_0 == 1 | n_1239_0_0 == 2

	label variable smokes_now "1 if currently smokes"
	
	gen ever_smoked = .
		// Individual were classed as Neversmoker if Past tobacco smoking= never (4)
		replace ever_smoked = 0	if n_1249_0_0 == 4 
		// Individual were classed as Neversmoker if Past tobacco smoking= noccasionally (2) or tried once or twice (3) AND smoked LESS than 100 cigarettes in lifetime
		replace ever_smoked = 0	if (n_1249_0_0 == 2 | n_1249_0_0 == 3) & n_2644_0_0 == 0  
		// Individual were classed as Eversmoker  if Past tobacco smoking= noccasionally (2) or tried once or twice (3) AND smoked MORE than 100 cigarettes in lifetime
		replace ever_smoked = 1	if (n_1249_0_0 == 2 | n_1249_0_0 == 3) & n_2644_0_0 == 1  
		// Individual classed as Eversmoker if Past tobacco smoking= most days (1)
		replace ever_smoked = 1	if n_1249_0_0 == 1 
		// Individual classed as Eversmoker if Current tobacco smoking= most days (1) AND smokes (hand rolled or manafactured) cigarettes
		replace ever_smoked = 1	if n_1239_0_0 == 1 & n_3446_0_0 >= 1 & n_3446_0_0 <= 2   
		// Individual classed as Eversmoker  if Current tobacco smoking= most days (1) AND smokes cigar/pipes and USED to SMOKE on MOST/ALL days
		replace ever_smoked = 1	if n_1239_0_0 == 1 & n_3446_0_0 == 3 & n_5959_0_0 == 1   
		// Individual classed as Neversmoker if Current tobacco smoking= most days (1) AND smokes cigar/pipes and USED to SMOKE on MOST/ALL days
		replace ever_smoked = 0	if n_1239_0_0 == 1 & n_3446_0_0 == 3 & n_5959_0_0 == 0   
		
	label variable ever_smoked "1 if ever smoked"

	drop n_1239_* n_1249_* n_2644_* n_2867_* n_2887_* n_2897_* n_2907_* n_3436_* n_3446_* n_3456_* n_3486_*  n_5959_* n_6183_* n_6194_* n_20116_* 
	
	
	******************************
	*** Diet
	******************************

	drop ts_105010_*0
	
	ren n_100002_0_0 calories1
	ren n_100002_1_0 calories2
	ren n_100002_2_0 calories3
	ren n_100002_3_0 calories4
	ren n_100002_4_0 calories5
	
	forvalues i = 1/5 {
		replace calories`i' = calories`i' * 0.239 // converting from Kj to calories
	}
	
	ren n_100004_0_0 fat1
	ren n_100004_1_0 fat2
	ren n_100004_2_0 fat3
	ren n_100004_3_0 fat4
	ren n_100004_4_0 fat5
	
	forvalues i = 1/5 {
		replace fat`i' = fat`i' * 9 // fat provides 9 calories per gram
	}
	
	ren n_100005_0_0 carbs1
	ren n_100005_1_0 carbs2
	ren n_100005_2_0 carbs3
	ren n_100005_3_0 carbs4
	ren n_100005_4_0 carbs5

	forvalues i = 1/5 {
		replace carbs`i' = carbs`i' * 4 // carbohydrates provide 4 calories per gram
	}
	
	ren n_100006_0_0 saturated_fat1
	ren n_100006_1_0 saturated_fat2
	ren n_100006_2_0 saturated_fat3
	ren n_100006_3_0 saturated_fat4
	ren n_100006_4_0 saturated_fat5

	forvalues i = 1/5 {
		replace saturated_fat`i' = saturated_fat`i' * 9 // saturated fat provides 9 calories per gram
	}	
	ren n_100008_0_0 sugars1
	ren n_100008_1_0 sugars2
	ren n_100008_2_0 sugars3
	ren n_100008_3_0 sugars4
	ren n_100008_4_0 sugars5
	
	forvalues i = 1/5 {
		replace sugars`i' = sugars`i' * 3.87 // sugars provide 3.87 calories per gram
	}	
	
	
	forvalues i = 1/5 {
		foreach var in fat carbs saturated_fat sugars {
			replace `var'`i' = `var'`i' / calories`i' // as a fraction of total calories
		}
	}
	
	forvalues i = 1/5 {
		label variable calories`i' 		"Calories"
		label variable fat`i'			"Fat"
		label variable saturated_fat`i'	"Saturated fat"
		label variable sugars`i' 		"Sugars"
	}
	
	drop n_100009_*
	
	******************************
	*** Accelerometer
	******************************
	
	ren n_90012_0_0 acc_average 
	ren n_90087_0_0 acc_average_nowear 
	ren n_90089_0_0 acc_median_nowear 
	*/
	******************************
	*** Residential air pollution
	******************************
	
	ren n_24016_0_0 NO2_2005
	ren n_24017_0_0 NO2_2006
	ren n_24018_0_0 NO2_2007
	ren n_24003_0_0 NO2_2010
	
	egen NO2 = rowmean(NO2_2005 NO2_2006 NO2_2007 NO2_2010)
	
	ren n_24004_0_0 NO
	
	ren n_24019_0_0 pm10_2007
	ren n_24005_0_0 pm10_2010
	
	egen pm10 = rowmean(pm10_2007 pm10_2010)
	ren n_24006_0_0 pm25
	
	drop NO2_2005 NO2_2006 NO2_2007 NO2_2010 pm10_2007 pm10_2010
	
	label variable NO2 	"Nitrogen dioxide air pollution"
	label variable NO  	"Nitrogen oxides air pollution"
	label variable pm25	"Particulate matter air pollution diameter<=2.5"
	label variable pm10	"Particulate matter air pollution diameter<=10"
	
	drop n_24007_* n_24008_*
	
	******************************
	*** early life factors
	******************************
	
	gen right_handed = .
		replace right_handed = 0	if n_1707_0_0 == 2 | n_1707_0_0 == 3 
		replace right_handed = 1	if n_1707_0_0 == 1 
	gen left_handed = .
		replace left_handed = 0		if n_1707_0_0 == 1 | n_1707_0_0 == 3 
		replace left_handed = 1		if n_1707_0_0 == 2 
	gen ambidextrous = .
		replace ambidextrous = 0	if n_1707_0_0 == 1 | n_1707_0_0 == 2 
		replace ambidextrous = 1	if n_1707_0_0 == 3 

	label variable right_handed 	"1 if right-handed"	
	label variable left_handed 		"1 if left-handed"	
	label variable ambidextrous 	"1 if ambidextrous"	
	
	drop n_1707_*
	
	gen adopted = .	
		replace adopted = 0	if		n_1767_0_0 == 0
		replace adopted = 1	if		n_1767_0_0 == 1
		
	drop n_1767_*		
		
	label variable adopted 	"1 if adopted"	
		
		
	gen twins = .	
		replace twins = 0	if		n_1777_0_0 == 0
		replace twins = 1	if		n_1777_0_0 == 1
		
	drop n_1777_*		
		
	label variable twins 	"1 if part of a multiple birth"		
			
	gen mbirthplace = (n_130_0_0 == -1 | n_129_0_0 == -1)
	ren n_130_0_0 birthplace_East
		replace birthplace_East = .     if birthplace_East == -1
	ren n_129_0_0 birthplace_North
		replace birthplace_North = .    if birthplace_North == -1
		
	drop n_130_* n_129_*
	
	gen breastfed = .
		replace breastfed = 1	if n_1677_0_0 == 1
		replace breastfed = 0	if n_1677_0_0 == 0 | n_1677_0_0 == -1
	label variable breastfed 	"1 if breastfed"
	
	gen breastfed_DNK = .
		replace breastfed_DNK = 1	if n_1677_0_0 == -1
		replace breastfed_DNK = 0	if n_1677_0_0 == 0 | n_1677_0_0 == 1
	label variable breastfed 	"Don't know if breastfed"
	
	gen msmoking = .
		replace msmoking = 1	if n_1787_0_0 == 1
		replace msmoking = 0	if n_1787_0_0 == 0 | n_1787_0_0 == -1
	label variable msmoking 	"1 if mother smoked around birth"
	
	gen msmoking_DNK = .
		replace msmoking_DNK = 1	if n_1787_0_0 == -1
		replace msmoking_DNK = 0	if n_1787_0_0 == 0 | n_1787_0_0 == 1
	label variable msmoking_DNK 	"DNK if mother smoked around birth"
	
	drop n_1677_* n_1787_* 
	
	
	**************************
	*** Sample restrictions
	**************************

	drop if DoB==.
	keep if country_of_birth == 1 | country_of_birth == 2 | country_of_birth == 3 // England, Wales, and Scotland 

	**************************
	*** Order of variables
	**************************

	ren n_eid ID
	label variable ID "ID"

	drop map pp pr
	drop country_of_birth

	
	order after DoB DoBafter date_birth year_birth month_birth SLA edu16 edu17 edu18 edu19 college Alevel Olevel CSE CSE_Olevel no_qualification schooling height weight BMI BMI1 BMI2 overweight overweight1 overweight2 obese obese1 obese2 waist_hip bf_percent bps bpd fev1_1 fev1_2 n_fev1_1 n_fev1_2 fvc_1 fvc_2 n_fvc_1 n_fvc_2 fev1ratio_1 fev1ratio_2 pef_1 pef_2 n_pef_1 n_pef_2 age age2 male white mixed_ethnicity asian black other_ethnicity NR_ethnicity Wales Scotland Jan Feb Mar Apr May Jun Jul Aug Oct Nov Dec inc_18k_orless inc_31k_orless inc_52k_orless hhld_income inc_100k_orless car_0 car_1_orless car_2_orless car_3_orless townsend own  NO pm25 NO2 pm10 employed retired n_806_0_0 n_806_1_0 n_806_2_0 n_816_0_0 n_816_1_0 n_816_2_0 calories* fat* carbs* saturated_fat* sugars* smokes_now ever_smoked acc_* bp_medication bp_diagnosis birthplace_East birthplace_North mbirthplace leg_length trunk_length breastfed breastfed_DNK msmoking msmoking_DNK right_handed left_handed ambidextrous adopted twins, after(ID)

	
	***************************************
	* Restricting to obs within bandwidth *
	***************************************
	
	keep if date_birth >= td(01sep1952) & date_birth <= td(31aug1962)

	***************************************
	* Drop if school leaving age missing  *
	***************************************
	
	drop if edu16 == .
	
	***************************************
	* 		Triangular weights 			  *
	***************************************
	
	*gen w = $bw - abs(DoB) - after
		*replace w = 0 if w < 0
		
	qui summ DoB
	qui gen lb = abs(r(min)) + 1
	qui gen ub = r(max) + 2
	qui egen K = rowmax(lb ub)
	qui gen w  = K - abs(DoB) - after
	drop lb ub K

	***************************************
	* 				Trends 				  *
	***************************************
	
	if "$polynomial" == "linear" {
		global posttrends	"c.DoBafter" 	
		global pretrends	"c.DoB"
	}
	
	if "$polynomial" == "quadratic" {
		global posttrends	"c.DoBafter##c.DoBafter" 	
		global pretrends	"c.DoB##c.DoB"
	}

	**********************************
	* Program that generates indexes *
	**********************************
	
	capture prog drop mergevars_ROSLA
	program mergevars_ROSLA, rclass

		version 13
		syntax varlist(numeric min=1), Generate(name)

		confirm new variable `generate'

		local thecount : word count `varlist'
		qui corr `varlist' if DoB < 0, cov
		matrix covmat=r(C)
		matrix theones=J(`thecount',1,1)
		matrix theweight=invsym(covmat)*theones

		qui gen `generate'=0
		local thecount=1
		foreach thevar of var `varlist' {
					  qui replace `generate'=`generate'+`thevar'*theweight[`thecount',1]
					  local thecount=`thecount'+1
		}

		qui {
		
			summ `generate' [aw=w] 	if DoB >= -365 & DoB <= -1
			replace `generate' = (`generate' - r(mean))/r(sd)
			cap drop u
			
		}

	end

	************************************************
	*** Program that standardizes outcome variables
	************************************************

	capture prog drop standardize
	program define standardize
	
		foreach measure in `1'  {

			gen temp = .
			foreach k in 0 1 {

				summ `measure' [aw=w] 	if DoB >= -365 & DoB <= -1 & male == `k'
				replace temp = (`measure' - r(mean))/r(sd) 	if male == `k'	
			
			}
			
			replace `measure' = temp
			drop temp	
			
		}

	end
	
	**********************************
	*  		Standardize outcomes     *
	**********************************

	clonevar raw_BMI = BMI
	clonevar raw_bpd = bpd
	clonevar raw_bps = bps
	qui standardize "BMI bf_percent waist_hip bps bpd fev1_1 fvc_1 pef_1" 
	
	**********************************
	*  Generating health indexes     *
	**********************************
	
	*** Changing sign of FEV1, FVC, and PEF so a higher index < = > worse health
	gen m_fev1_1 = -fev1_1
	gen m_fvc_1  = -fvc_1
	gen m_pef_1  = -pef_1
	
	mergevars_ROSLA BMI bf_percent waist_hip, g(index_anthro)              
	mergevars_ROSLA bps bpd, g(index_bp)
	mergevars_ROSLA m_fev1_1 m_fvc_1 m_pef_1, g(index_spiro)		
	mergevars_ROSLA index_anthro index_bp index_spiro, g(index_health) 
	
	drop m_fev1_1 m_fvc_1 m_pef_1
	
	label variable index_anthro "Index anthropometrics/bioimpedance"
	label variable index_bp 	"Index blood pressure"
	label variable index_spiro 	"Index spirometry"
	label variable index_health "Summary health index"
	
	order index_health index_anthro index_bp index_spiro, before(height)
	
	mergevars_ROSLA NO NO2 pm25 pm10, g(index_pollution) 
	label variable index_pollution "Index Pollution"
	order index_pollution, after(pm10)
	
	
